********************************************************************************
********************* ML MODEL PERFORMANCE *************************************
clear all
use "$maindir/Results/Model_Outputs/ihwap_wedge.dta"

** performance metrics
* in sample RMSE
gen tau_ml_sq = tau_ml^2
sum tau_ml_sq if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
sca mean_sq_err = `r(mean)'
di "MSE = " mean_sq_err
sca rmse = sqrt(mean_sq_err)
di "RMSE = " rmse
sum tau_ml if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
sca mean_resid = `r(mean)'

* cross validated RMSE
gen cvresids_sq = cvresids^2
sum cvresids_sq if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
sca mean_sq_cverr = `r(mean)'
di "CV-MSE = " mean_sq_cverr
sca cvrmse = sqrt(mean_sq_cverr)
di "CV-RMSE = " cvrmse
sum cvresids if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
sca mean_resid_cv = `r(mean)'


* simple errors
sum tau_ml if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., detail
sum pct_tau_ml if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., detail

*** percent error weighted by usage
gen wgt_inerrors_full = total_mmbtu*inerrors_full
egen tot_use_pre = total(total_mmbtu) if treated==0 & inerrors_full!=.
egen totwgt_inerrors_full_pre = total(wgt_inerrors_full) if treated==0 & inerrors_full!=.

gen avg_inerror_pre = totwgt_inerrors_full_pre/tot_use_pre

gen wgt_cverrors = total_mmbtu*cverrors
egen totwgt_cverrors_pre = total(wgt_cverrors) if treated==0 & cverrors!=.

gen avg_cverror_pre = totwgt_cverrors_pre/tot_use_pre


******* Distribution of errors
histogram inresids_full if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. & inresids_full>-10 & inresids_full<10, ///
	percent xlabel(-10(1)10, labsize(small)) width(0.1) ///
	ytitle("Percent of Observations (%)") xtitle("In-Sample Residuals (MMBtu)") ///
	ylabel(0(1)8) yscale(range(0 8)) ///
	graphregion(color(white)) bgcolor(white)  bcolor(blue%30) ///
	text(3 3 "RMSE = `=scalar(round(rmse), 0.001)'", placement(e) color(red) size(small) just(left)) ///
	xline(`=scalar(mean_resid)', lcolor(red)) ///
	text(4.1 0.1 "Average Residual = `=scalar(round(mean_resid), 0.001)'", placement(e) color(red) size(small) just(left)) 
graph export "$maindir/Results/Figures/resids_hist_in.png", replace width(2500)

histogram cvresids if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. & cvresids>-10 & cvresids<10, ///
	percent xlabel(-10(1)10, labsize(small)) width(0.1) ///
	ytitle("Percent of Observations (%)") xtitle("Cross-Validated Residuals (MMBtu)") ///
	ylabel(0(1)8) yscale(range(0 8)) ///
	graphregion(color(white)) bgcolor(white)  bcolor(blue%30) ///
	text(2 4 "RMSE = `=scalar(round(cvrmse), 0.001)'", placement(e) color(red) size(small) just(left)) ///
	xline(`=scalar(mean_resid)', lcolor(red)) ///
	text(3.1 0.1 "Average Residual = `=scalar(round(mean_resid_cv), 0.001)'", placement(e) color(red) size(small) just(left)) 
graph export "$maindir/Results/Figures/resids_hist_cv.png", replace width(2500)


** binning the mmbtu outcome variable
gen bin_mmbtu = .
forval i = 0(2)38 {
replace bin_mmbtu = `i' if total_mmbtu>`i' & total_mmbtu<=`i'+2
label define mmbtulabs `i' "(`i' `=scalar(`i')+2']", add
}
replace bin_mmbtu = 40 if total_mmbtu>40 & total_mmbtu!=.
label define mmbtulabs 40 ">40", add
label values bin_mmbtu mmbtulabs

**** prediction errors
*** number of observations in each bin of usage
gen aux = 1
sort bin_mmbtu
bysort bin_mmbtu : gen countbins = sum(aux) if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
bysort bin_mmbtu : egen freq_bins = max(countbins)
gen totobs = sum(aux) if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
egen tot_observations = max(totobs)
gen pct_obs_bins = 100*(freq_bins/tot_observations)
drop aux totobs countbins

***** regressions correlating model performance with the outcome
** plotting performance - in sample
reg inresids_full ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons vce(cluster Household)
est sto residsfull

reg inerrors_full ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. & bin_mmbtu>0, nocons vce(cluster Household)
est sto errorsfull

reg pct_obs_bins ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. , nocons
est sto pcthistobs

reg pct_obs_bins ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. & bin_mmbtu>0, nocons
est sto pcthistobs2


coefplot residsfull (pcthistobs,  color(black) msymbol(none) axis(2) recast(connected) lpattern(dash) ciopts(lpattern(blank))) ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("ML Model Residuals (MMBtu)") ///
		ytitle("Percent Observations in Sample (%)", axis(2)) xtitle("Monthly Energy Usage (MMBtu)") ///
		graphregion(color(white)) bgcolor(white) legend(order(2 "Avg. Residual" 4 "% Observations")) nooffsets
graph export "$maindir/Results/Figures/mlresids_in.png", replace width(2500)



********************************************************************************
********************* plotting performance - cross-validated
clear all
program drop _all
do "$maindir/Code/bootstrapres.do"
use "$maindir/Results/Model_Outputs/ihwap_wedge.dta"

** binning the mmbtu outcome variable
gen bin_mmbtu = .
forval i = 0(2)38 {
replace bin_mmbtu = `i' if total_mmbtu>`i' & total_mmbtu<=`i'+2
label define mmbtulabs `i' "(`i' `=scalar(`i')+2']", add
}
replace bin_mmbtu = 40 if total_mmbtu>40 & total_mmbtu!=.
label define mmbtulabs 40 ">40", add
label values bin_mmbtu mmbtulabs


*** number of observations in each bin of usage
gen aux = 1
sort bin_mmbtu
bysort bin_mmbtu : gen countbins = sum(aux) if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
bysort bin_mmbtu : egen freq_bins = max(countbins)
gen totobs = sum(aux) if treated==0 & twoyears_prepost==1 & meterdays_monthly!=.
egen tot_observations = max(totobs)
gen pct_obs_bins = 100*(freq_bins/tot_observations)
drop aux totobs countbins


*** plotting performance-  out of sample, cross validated
reg cvresids ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons vce(cluster Household)
est sto residsfull

reg cverrors ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. & bin_mmbtu>0, nocons vce(cluster Household)
est sto errorsfull

reg pct_obs_bins ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. , nocons
est sto pcthistobs

reg pct_obs_bins ibn.bin_mmbtu if treated==0 & twoyears_prepost==1 & meterdays_monthly!=. & bin_mmbtu>0, nocons
est sto pcthistobs2


coefplot residsfull (pcthistobs,  color(black) msymbol(none) axis(2) recast(connected) lpattern(dash) ciopts(lpattern(blank))) ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("ML Model Residuals (MMBtu)") ///
		ytitle("Percent Observations in Sample (%)", axis(2)) xtitle("Monthly Energy Usage (MMBtu)") ///
		graphregion(color(white)) bgcolor(white) legend(order(2 "Avg. Residual" 4 "% Observations")) nooffsets
graph export "$maindir/Results/Figures/mlresids_cv.png", replace width(2500)



************************************************
**** errors by other characteristics

** blower pre
gen bin_blowerpre = .
forval i = 1000(500)9500 {
replace bin_blowerpre = `i' if Blower_Pre>`i' & Blower_Pre<=`i'+500
label define bplab `i' "(`i' `=scalar(`i')+500']", add
}
replace bin_blowerpre = 10000 if Blower_Pre>10000 & Blower_Pre!=.
replace bin_blowerpre = 500 if Blower_Pre<=1000 & Blower_Pre!=.
label define bplab 10000 ">10000", add
label define bplab 500 "<=1000", add
label values bin_blowerpre bplab

reg cvresids ibn.bin_blowerpre if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto residsfull

reg total_mmbtu ibn.bin_blowerpre if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

matrix coefs = (.)
matrix vars = (.)
forval i = 500(500)10000 {
	nlcom 100*([residsfull_mean]`i'.bin_blowerpre/[usebybin_mean]`i'.bin_blowerpre)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.bin_blowerpre"
	matrix colnames var = "`i'.bin_blowerpre"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull

coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Pre-Treament Blower Door Reading (CFM50)") ///
		graphregion(color(white)) bgcolor(white) legend(order(1 "95% CI" 2 "Avg. in Bin")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_blowerpre_cv.png", replace width(2500)


** blower post
gen bin_blowerpost = .
forval i = 1000(500)5500 {
replace bin_blowerpost = `i' if Blower_Post>`i' & Blower_Post<=`i'+500
label define bplab2 `i' "(`i' `=scalar(`i')+500']", add
}
replace bin_blowerpost = 6000 if Blower_Post>6000 & Blower_Post!=.
replace bin_blowerpost = 500 if Blower_Post<=1000 & Blower_Post!=.
label define bplab2 6000 ">6000", add
label define bplab2 500 "<=1000", add
label values bin_blowerpost bplab2

reg cvresids ibn.bin_blowerpost if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons 
est sto residsfull

reg total_mmbtu ibn.bin_blowerpost if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

matrix coefs = (.)
matrix vars = (.)
forval i = 500(500)6000 {
	nlcom 100*([residsfull_mean]`i'.bin_blowerpost/[usebybin_mean]`i'.bin_blowerpost)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.bin_blowerpost"
	matrix colnames var = "`i'.bin_blowerpost"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull

coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Post-Treament Blower Door Reading (CFM50)") ///
		graphregion(color(white)) bgcolor(white) legend(order(1 "95% CI" 2 "Avg. in Bin")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_blowerpost_cv.png", replace width(2500)



** square feet
gen bin_sqfeet = .
forval i = 750(250)2500 {
replace bin_sqfeet = `i' if sqfeet>`i' & sqfeet<=`i'+250
label define sqftlab `i' "(`i' `=scalar(`i')+250']", add
}
replace bin_sqfeet = 2750 if sqfeet>2750 & sqfeet!=.
replace bin_sqfeet = 500 if sqfeet<=750 & sqfeet!=.
label define sqftlab 2750 ">2750", add
label define sqftlab 500 "<=750", add
label values bin_sqfeet sqftlab

reg cvresids ibn.bin_sqfeet if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto residsfull

reg total_mmbtu ibn.bin_sqfeet if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

matrix coefs = (.)
matrix vars = (.)
forval i = 500(250)2750 {
	nlcom 100*([residsfull_mean]`i'.bin_sqfeet/[usebybin_mean]`i'.bin_sqfeet)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.bin_sqfeet"
	matrix colnames var = "`i'.bin_sqfeet"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull


coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)")  ///
		xtitle("Floor Area (Square Feet)") ///
		graphregion(color(white)) bgcolor(white) legend(order(1 "95% CI" 2 "Avg. in Bin")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_sqft_cv.png", replace width(2500)


** Real income
gen bin_income = .
forval i = 5(5)35 {
replace bin_income = `i' if Real_income1000>`i' & Real_income1000<=`i'+5
label define inclab `i' "(`i' `=scalar(`i')+5']", add
}
replace bin_income = 40 if Real_income1000>40 & Real_income1000!=.
replace bin_income = 0 if Real_income1000<=5 & Real_income1000!=.
label define inclab 40 ">40", add
label define inclab 0 "<=5", add
label values bin_income inclab

reg cvresids ibn.bin_income if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto residsfull

reg total_mmbtu ibn.bin_income if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

matrix coefs = (.)
matrix vars = (.)
forval i = 0(5)40 {
	nlcom 100*([residsfull_mean]`i'.bin_income/[usebybin_mean]`i'.bin_income)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.bin_income"
	matrix colnames var = "`i'.bin_income"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull

coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Real Income ($1000)") ///
		graphregion(color(white)) bgcolor(white)  legend(order(1 "95% CI" 2 "Avg. in Bin")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_income_cv.png", replace width(2500)


** temperature
gen tmean = (tmax + tmin)/2

gen bin_temp = .
gen counter = 1
forval i = -5(5)20 {
replace bin_temp = counter if tmean>`i' & tmean<=`i'+5
qui : sum counter
label define templab `r(mean)' "(`i' `=scalar(`i')+5']", add
replace counter = counter+1
}
replace bin_temp = counter if tmean>25 & tmean!=.
replace bin_temp = 0 if tmean<=-5 & tmean!=.
qui : sum counter
label define templab `r(mean)' ">25", add
label define templab 0 "<=-5", add
label values bin_temp templab
drop counter

reg cvresids ibn.bin_temp if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons 
est sto residsfull

reg total_mmbtu ibn.bin_temp if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

matrix coefs = (.)
matrix vars = (.)
forval i = 0(1)7 {
	nlcom 100*([residsfull_mean]`i'.bin_temp/[usebybin_mean]`i'.bin_temp)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.bin_temp"
	matrix colnames var = "`i'.bin_temp"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull

coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Avg. Temperature in Bill Cycle (celsius)") ///
		graphregion(color(white)) bgcolor(white) legend(order(1 "95% CI" 2 "Avg. in Bin")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_meantemp_cv.png", replace width(2500)



** month of year
label define months 1 "jan" 2 "feb" 3 "mar" 4 "apr" 5 "may" 6 "jun" 7 "jul" 8 "aug" 9 "sep" 10 "oct" 11 "nov" 12 "dec"
label values end_month months

reg cvresids ibn.end_month if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto residsfull

reg total_mmbtu ibn.end_month if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

matrix coefs = (.)
matrix vars = (.)
forval i = 1(1)12 {
	nlcom 100*([residsfull_mean]`i'.end_month/[usebybin_mean]`i'.end_month)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.end_month"
	matrix colnames var = "`i'.end_month"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull


coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Month of Year") ///
		graphregion(color(white)) bgcolor(white)  legend(order(1 "95% CI" 2 "Avg. in Month")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_months_cv.png", replace width(2500)


** agency
reg cvresids ibn.AgencyID if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons 
est sto residsfull

reg total_mmbtu ibn.AgencyID if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

levelsof AgencyID if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., local(agencies)
matrix coefs = (.)
matrix vars = (.)
foreach i of local agencies {
	nlcom 100*([residsfull_mean]`i'.AgencyID/[usebybin_mean]`i'.AgencyID)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.AgencyID"
	matrix colnames var = "`i'.AgencyID"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull

coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Agency ID") xlabel(1(1)35) ///
		graphregion(color(white)) bgcolor(white) legend(order(1 "95% CI" 2 "Avg. in Agency")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_agency_cv.png", replace width(2500)



** family size
reg cvresids ibn.noccupants if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons 
est sto residsfull

reg total_mmbtu ibn.noccupants if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
est sto usebybin

suest residsfull usebybin, vce(cluster Household)

matrix coefs = (.)
matrix vars = (.)
forval i = 1(1)9 {
	nlcom 100*([residsfull_mean]`i'.noccupants/[usebybin_mean]`i'.noccupants)
	matrix coef = r(b)
	matrix var = r(V)
	scalar nobs = e(N)
	matrix colnames coef = "`i'.noccupants"
	matrix colnames var = "`i'.noccupants"
	
	matrix coefs = (coefs, coef)
	matrix vars = (vars, var)
}

matrix coefs = coefs[1..., 2...]
matrix vars = vars[1..., 2...]
matrix vars = diag(vars)

bootstrapres coefs vars nobs
est sto errorsfull

coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Family Size (number of occupants)") xlabel(1(1)9) yscale(range(-2 5)) ylabel(-2(2)4) ///
		graphregion(color(white)) bgcolor(white) legend(order(1 "95% CI" 2 "Avg. in Bin"))  nooffsets
graph export "$maindir/Results/Figures/mlerrors_famsize_cv.png", replace width(2500)




************ errros by spending in measures
* binned dollars spent
foreach x in Real_tot_actAirSeal Real_tot_actBaseload Real_tot_actDoor Real_tot_actHealSfty Real_tot_actWtHtr {
	gen bin`x' = .
	forval i = 0(100)900 {
		replace bin`x' = `i' if `x'>`i'-100 & `x'<=`i'
		label define spendlab1 `i' "(`=scalar(`i')-100' `i']", modify
	}
	replace bin`x' = 1000 if `x'>900 & `x'!=.
	label define spendlab1 1 "0", modify
	label define spendlab1 1000 ">9000", modify
	label values bin`x' spendlab1
}
foreach x in Real_tot_actFoundation Real_tot_actGeneral Real_tot_actWindow {
	gen bin`x' = .
	forval i = 0(200)2000 {
		replace bin`x' = `i' if `x'>`i'-200 & `x'<=`i'
		label define spendlab2 `i' "(`=scalar(`i')-200' `i']", modify
	}
	replace bin`x' = 2200 if `x'>2000 & `x'!=.
	label define spendlab2 1 "0", modify
	label define spendlab2 2200 ">2000", modify
	label values bin`x' spendlab2
}
foreach x in Real_tot_actAttic Real_tot_actWallIns {
	gen bin`x' = .
	forval i = 0(300)2700 {
		replace bin`x' = `i' if `x'>`i'-300 & `x'<=`i'
		label define spendlab3 `i' "(`=scalar(`i')-300' `i']", modify
	}
	replace bin`x' = 3000 if `x'>2700 & `x'!=.
	label define spendlab3 1 "0", modify
	label define spendlab3 3000 ">2700", modify
	label values bin`x' spendlab3
}
foreach x in Real_oth_cost Real_tot_actFurnace {
	gen bin`x' = .
	forval i = 0(300)3000 {
		replace bin`x' = `i' if `x'>`i'-300 & `x'<=`i'
		label define spendlab4 `i' "( `=scalar(`i')-300' `i']", modify
	}
	replace bin`x' = 3300 if `x'>3000 & `x'!=.
	label define spendlab4 1 "0", modify
	label define spendlab4 3300 ">3000", modify
	label values bin`x' spendlab4
}
drop bin_TotalCost
gen bin_TotalCost = .
forval i = 0(500)9000 {
	replace bin_TotalCost = `i' if Real_total_cost>`i'-500 & Real_total_cost<=`i'
}
replace bin_TotalCost = 9500 if Real_total_cost>9000 & Real_total_cost!=.
replace bin_TotalCost = 500 if bin_TotalCost==0

foreach var of varlist binReal_tot_actAirSeal-binReal_tot_actWallIns {
	replace `var' = 1 if `var'==0
}
foreach x in Real_tot_actAirCon {
	gen bin`x' = .
	forval i = 0(100)500 {
		replace bin`x' = `i' if `x'>`i'-100 & `x'<=`i'
		label define spendlab5 `i' "(`=scalar(`i')-100' `i']", modify
	}
	replace bin`x' = 600 if `x'>500 & `x'!=.
	replace bin`x' = 1 if bin`x'==0
	label define spendlab5 600 ">500", modify
	label define spendlab5 1 "0", modify
	label values bin`x' spendlab5
}

drop binReal_oth_cost

foreach x in Attic AirCon AirSeal Baseload Door HealSfty WtHtr Foundation General Window Furnace WallIns {
	drop bin`x'
	rename binReal_tot_act`x' bin`x'
}

** fixing bins for attic
replace binAttic = 2700 if Real_tot_actAttic>2400 & Real_tot_actAttic!=.
forval i = 0(300)2400 {
	label define atticlab `i' "(`=scalar(`i')-300' `i']", modify
}
label define atticlab 1 "0", modify
label define atticlab 2700 ">2400", modify
label values binAttic atticlab


label var Real_tot_actAirCon "Air Conditioning"
label var Real_tot_actAirSeal "Air Sealing"
label var Real_tot_actAttic "Attic"
label var Real_tot_actBaseload "Baseload"
label var Real_tot_actDoor "Door"
label var Real_tot_actFoundation "Foundation"
label var Real_tot_actFurnace "Furnace"
label var Real_tot_actGeneral "General"
label var Real_tot_actHealSfty "Health and Safety"
label var Real_tot_actWallIns "Wall Insulation"
label var Real_tot_actWindow "Window"
label var Real_tot_actWtHtr "Water Heater"


** errors in percent
foreach x in Attic AirCon AirSeal Baseload Door HealSfty WtHtr Foundation General Window Furnace WallIns {
	reg cvresids ibn.bin`x' if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons 
	est sto residsfull
		
	reg total_mmbtu ibn.bin`x' if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., nocons
	est sto usebybin

	suest residsfull usebybin, vce(cluster Household)

	levelsof bin`x' if treated==0 & twoyears_prepost==1 & meterdays_monthly!=., local(spendbins)
	matrix coefs = (.)
	matrix vars = (.)
	foreach i of local spendbins {
		nlcom 100*([residsfull_mean]`i'.bin`x'/[usebybin_mean]`i'.bin`x')
		matrix coef = r(b)
		matrix var = r(V)
		scalar nobs = e(N)
		matrix colnames coef = "`i'.bin`x'"
		matrix colnames var = "`i'.bin`x'"
		
		matrix coefs = (coefs, coef)
		matrix vars = (vars, var)
	}

	matrix coefs = coefs[1..., 2...]
	matrix vars = vars[1..., 2...]
	matrix vars = diag(vars)

	bootstrapres coefs vars nobs
	est sto errorsfull

local varlab : variable label Real_tot_act`x'
coefplot errorsfull ///
		, vertical xlabel(,labsize(small) angle(45)) ytitle("CV Prediction Errors (%)") ///
		xtitle("Amount Spent on `varlab' ($)") xlabel(, valuelabel) ///
		graphregion(color(white)) bgcolor(white) legend(order(1 "95% CI" 2 "Avg. in Bin")) nooffsets
graph export "$maindir/Results/Figures/mlerrors_`x'_cv.png", replace width(2500)
}

